Using the inherently unexciting topic of ✨postcodes✨….
To talk about some useful R packages
{duckdb}{sf}{geos}We will not be producing statistics below country level because the use of interpolation for smaller areas is additionally challenging, with the quality of the resulting values liable to be lower. We therefore recommend that comparisons between local areas in different parts of the UK should be based on the respective 2021 and 2022 census data. Users should, however, be aware of any factors that may complicate direct comparability between those dates.
Shout out to Aalasdair Rae’s Greggs spider map… https://alasdairrae.github.io/steakbakespider/
# A tibble: 2,552 × 7
shop_name address_house_number…¹ address_street_name address_city
<chr> <chr> <chr> <chr>
1 Bargoed, U5 The Plat… "U5" The Plateau Bargoed
2 Rotherham, 2 Effingh… "" 2/6 Effingham Stre… Rotherham
3 Nottingham, U32 Vict… "U32 Victoria Centre" Lower Parliament S… Nottingham
4 Birmingham, 85 New St "" 85 New Street Birmingham
5 Daventry, U20 Bowen … "U20" Bowen Square Daventry
6 Birmingham, U38 Grea… "" 37/38 Great Wester… Birmingham
7 Bletchley, 108 Queen… "" 108 Queensway Bletchley
8 Rushden, 40 High St "" 40 High Street Rushden
9 Outlet: Leicester, 2… "" 237C Uppingham Road Leicester
10 Llanharan, U3 Bridge… "U3" Bridgend Road Llanharan
# ℹ 2,542 more rows
# ℹ abbreviated name: ¹address_house_number_or_name
# ℹ 3 more variables: address_post_code <chr>, address_longitude <dbl>,
# address_latitude <dbl>
greggs_table <- greggs_data |>
select(-c("address_longitude", "address_latitude"))
as_tibble(greggs_table)# A tibble: 2,552 × 5
shop_name address_house_number…¹ address_street_name address_city
<chr> <chr> <chr> <chr>
1 Bargoed, U5 The Plat… "U5" The Plateau Bargoed
2 Rotherham, 2 Effingh… "" 2/6 Effingham Stre… Rotherham
3 Nottingham, U32 Vict… "U32 Victoria Centre" Lower Parliament S… Nottingham
4 Birmingham, 85 New St "" 85 New Street Birmingham
5 Daventry, U20 Bowen … "U20" Bowen Square Daventry
6 Birmingham, U38 Grea… "" 37/38 Great Wester… Birmingham
7 Bletchley, 108 Queen… "" 108 Queensway Bletchley
8 Rushden, 40 High St "" 40 High Street Rushden
9 Outlet: Leicester, 2… "" 237C Uppingham Road Leicester
10 Llanharan, U3 Bridge… "U3" Bridgend Road Llanharan
# ℹ 2,542 more rows
# ℹ abbreviated name: ¹address_house_number_or_name
# ℹ 1 more variable: address_post_code <chr>
{readr} & {dplyr}{duckdb} & {dbplyr}tic()
# Create duckdb connection
con <- dbConnect(duckdb())
# Register Greggs data to duckdb, so we can use it in the duckdb pipeline
duckdb_register(con, "greggs_table", greggs_table)
# Carry out the join within duckdb
greggs_coords <- tbl(con, "greggs_table") |>
mutate(pcds = str_remove_all(address_post_code, " ")) |>
left_join(
tbl(con, "data/NSPL_FEB_2025_UK.csv") |>
select(pcds, long, lat) |>
mutate(pcds = str_remove_all(pcds, " ")),
by = "pcds"
) |>
collect()
toc()1.75 sec elapsed
~7x faster in this example - benefits scale on larger datasets
greggs_coords_sf <- greggs_coords |>
st_as_sf(coords = c("long","lat"), # specify X and Y columns
na.fail = FALSE,
crs = 4326) |>
st_transform(27700) |>
arrange(shop_name)
distance_from_API_location <- st_distance(
greggs_sf |> st_transform(27700) |> arrange(shop_name),
greggs_coords_sf,
by_element = TRUE
)
# add distances to both tables for easier filtering when checking geocodes look reasonable:
greggs_coords_sf$m_from_true_location <- round(
as.numeric(distance_from_API_location), 2
)
top5_discrepancies <- greggs_coords_sf |>
select(shop_name, address_street_name, address_post_code, m_from_true_location) |>
arrange(desc(m_from_true_location)) |>
slice_head(n = 5)
top5_discrepanciesSimple feature collection with 5 features and 4 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 140498.4 ymin: 279437.9 xmax: 453506 ymax: 695112.3
Projected CRS: OSGB36 / British National Grid
# A tibble: 5 × 5
shop_name address_street_name address_post_code m_from_true_location
<chr> <chr> <chr> <dbl>
1 EG On The Move Gib… Watling Street LE17 6AR 22503.
2 ASDA EXPRESS Kimme… 55 Expressway LL18 5XE 14220.
3 BLAKEMORE Phoenix … 205 Nottingham Road NG16 1AE 4101.
4 ASDA EXPRESS Kilde… Kildean Business P… FK9 4UA 3696.
5 APPLEGREEN M1 Nort… Tullyacross BT27 5SG 2603.
# ℹ 1 more variable: geometry <POINT [m]>
# Read in UK boundary geometry -------------------------------------------------
bounds <- read_sf("data/Countries_December_2024_Boundaries_UK_BGC.gpkg") |>
st_union() |>
st_as_sf()
# Read in centroids geopackage--------------------------------------------------
centroid_path <- "data/NSPL_Online_Latest_Centroids_2025-03-12.gpkg"tic()
centroids <- sf::read_sf(
centroid_path,
# Exclude terminated postcodes (whether they have a grid ref or not),
# those with no grid ref, and 'Large user' postcodes (business/PO box ones?)
query = "SELECT PCDS, LONG, LAT, SHAPE FROM NSPL_LATEST_UK
WHERE OSGRDIND NOT IN (8, 9)
AND USERTYPE = 0
AND DOTERM IS NULL"
) |>
clean_names() |>
mutate(pcds_district = word(pcds, 1) |> str_squish())
sf_voronoi <- centroids |>
# Create voronoi polygons for each postcode via sf -----------------------------
st_geometry() |> # to get sfc from sf
st_union() |> # to get a sfc of MULTIPOINT type
sf::st_voronoi(envelope = st_geometry(bounds)) |>
st_collection_extract(type = "POLYGON") |> # a list of polygons
st_sf() |>
st_join(centroids) # add attribute cols back in
# rename geometry column
# (otherwise it'll be named after the functions that produced it.)
st_geometry(sf_voronoi) <- "geometry"
toc(){geos} instead{geos} provides access to the GEOS C library through R.
geos_bounds <- bounds |> as_geos_geometry()
message("Voronoi union via {geos}:")
tic()
geos_voronoi <- centroids |>
as_geos_geometry() |>
geos_make_collection() |>
geos_voronoi_polygons(
env = geos_bounds
) |>
st_as_sf() |>
st_collection_extract(type = "POLYGON") |>
# get attributes back
st_join(centroids) |>
as_tibble() |>
group_by(pcds_district) |>
summarise(
geometry = geometry |>
geos_make_collection() |>
geos_unary_union()
) |>
# convert from tibble back to sf object
st_as_sf() |>
# trim to UK bounds or some borders extend out to sea
st_intersection(bounds)
toc()
# ~240 secondsCompared with Google Maps:
bristol_pop_oa21 <- read_sf(
"data/Output_Areas_2021_EW_BGC_V2.gpkg",
query = "SELECT OA21CD, SHAPE FROM OA_2021_EW_BGC_V2"
) |>
st_filter(geos_voronoi_bristol, .predicate = st_intersects) |>
inner_join(bristol_pop_table, by = "OA21CD")
tm_shape(bristol_pop_oa21) + tm_fill(col = "population",
style = "fisher") +
tm_shape(geos_voronoi |>
filter(str_detect(pcds_district, regex("^BS\\d$")))
) +
tm_borders() +tm_text("pcds_district")sf::st_interpolate_aw()https://www.ons.gov.uk/methodology/geography/geographicalproducts/postcodeproducts
https://geoportal.statistics.gov.uk/documents/8093d03408f04240a2f11a9d8913a45e/explore
Parry, Josiah. 2023. “R-Spatial Beyond Sf.” September 8, 2023. https://geocompx.org/post/2023/beyond-sf/
https://longair.net/blog/2021/08/23/open-data-gb-postcode-unit-boundaries/
https://grantmcdermott.com/posts/fast-geospatial-datatable-geos/
https://r-spatial.github.io/sf/reference/interpolate_aw.html
https://alasdairrae.github.io/steakbakespider/